In [1]:
import pandas as pd
from scrapenhl2.scrape import autoupdate, schedules, team_info, players
from scrapenhl2.manipulate import manipulate as manip
The purpose of this script is to get game-by-game 5v5 toi counts by player and team for every game since 2012-13. We can get this information from the 5v5 player log easily.
In [2]:
# Update data
# autoupdate.autoupdate() # Comment in if needed, and loop if needed
# manip.get_5v5_player_log(2017, force_create) # Comment in if needed, and loop if needed
log = pd.concat([manip.get_5v5_player_log(season).assign(Season=season) for season in range(2012, 2018)])
sch = pd.concat([schedules.get_season_schedule(season).assign(Season=season) for season in range(2012, 2018)])
log.head()
Out[2]:
All we need to do is:
In [3]:
# Teams
teamtoi = log.assign(TOI=log.TOION + log.TOIOFF) \
[['Season', 'Game', 'TOI']] \
.groupby(['Season', 'Game'], as_index=False) \
.max() # take max to avoid floating point errors that may fell drop_duplicates
teamtoi = sch[['Season', 'Game', 'Home', 'Road']] \
.melt(id_vars=['Season', 'Game'], var_name='HR', value_name='TeamID') \
.merge(teamtoi, how='inner', on=['Season', 'Game']) \
.drop_duplicates()
# Make names into str, and convert TOI from hours to minutes
teamtoi.loc[:, 'Team'] = teamtoi.TeamID.apply(lambda x: team_info.team_as_str(x))
teamtoi.loc[:, 'TOI(min)'] = teamtoi.TOI * 60
teamtoi = teamtoi.drop(['TeamID', 'TOI'], axis=1)
teamtoi.head()
Out[3]:
In [4]:
# Individuals
indivtoi = log[['Season', 'Game', 'PlayerID', 'TOION', 'TeamID']]
# IDs to names and TOI from hours to minutes
indivtoi.loc[:, 'Player'] = players.playerlst_as_str(indivtoi.PlayerID.values)
indivtoi.loc[:, 'Team'] = indivtoi.TeamID.apply(lambda x: team_info.team_as_str(x))
indivtoi.loc[:, 'TOI(min)'] = indivtoi.TOION * 60
indivtoi = indivtoi.drop(['TeamID', 'TOION', 'PlayerID'], axis=1)
indivtoi.head()
Out[4]:
In [5]:
# Write to file
teamtoi.to_csv('/Users/muneebalam/Desktop/teamtoi.csv')
indivtoi.to_csv('/Users/muneebalam/Desktop/indivtoi.csv')